// Initial things - keeping this in Stata v14 to ensure code still works
version 14							// No longer have to set mem in Stata v12
clear
estimates clear
clear matrix 
clear mata
pause on
set maxvar 20000
set more off
* .ado files used in these .do files
// NOTE: none of these worked automatically - I had to use findit (e.g. 'findit svmat2') and click through to install
capture noisily which estout
	if _rc~=0		ssc install estout
capture noisily which svmat2
	if _rc~=0		net install dm79.pkg, from(http://www.stata.com/stb/stb56/)		// this syntax didn't work for me, and I had to use 'findit dm79' and go from there
capture noisily which misschk
	if _rc~=0		net install spost9_ado, from(https://jslsoc.sitehost.iu.edu/stata/) 
capture noisily which renvars
	if _rc~=0		net install dm88_1.pkg 

	
// File locations
dis "${user}"
global dodir 			"${user}\OneDrive - King's College London\Disability work\ESRC Future Leaders Disability\Phase 1 (Dis Emp Rates) - Intl\EHIS"
global countryleveldir	"${user}\OneDrive - King's College London\Disability work\ESRC Future Leaders Disability\Phase 1 (Dis Emp Rates) - Intl\1_Ecological analyses & charts"		// For merging into stage 2 models
global refstatsdir 		"${user}\OneDrive - King's College London\WS\x. Ref WS stats"																	// For the OECD policy scores in full
* Secure datasets are on the desktop, which is a different location on different BBG computers
capture cd "C:\Users\bpb\Desktop\Datasets Misc"			// old computer when this was first being run
	if _rc==0		global datadir "C:\Users\bpb\Desktop\Datasets Misc"
capture cd "C:\Users\benba\Desktop\Datasets Misc"		// 2021-2ish work laptop bought by myself
	if _rc==0		global datadir "C:\Users\benba\Desktop\Datasets Misc"
capture cd "C:\Users\k2256879\Datasets Misc"		// 2021-2ish work laptop bought by myself
	if _rc==0		global datadir "C:\Users\k2256879\Datasets Misc"
capture cd "C:\Users\benba\Datasets Misc"
	if _rc==0		global datadir "C:\Users\benba\Datasets Misc"
* EHIS
global EHIS					"$datadir\EHIS"
global UK_EHIS				"$EHIS\UK waves 2 & 3 (SN7881)\stata\stata13"
	
	
// Follow-on dirs (that are within the folders above)
global logdir		"${dodir}\z. Logs"
	capture log close
	local cday = day(date("`c(current_date)'", "DMY"))
	local cmth = month(date("`c(current_date)'", "DMY"))
	local cyr  = year(date("`c(current_date)'", "DMY"))
	log using "${logdir}\Log `cyr'-`cmth'-`cday'.txt", text append
global versno 	"v1_0"							
	notes _dta: v1 is 2023 version for OECD report


// Globals for analysis
// *need turning into locals to be part of any loops (as can't have two '{' on the same line))
* Disability globals 
/* NOTE that IRT models require categorical variables - you just can't use a continuous variable */
// General things
	global empvar				"workB"
	global countryvar			"countrynum"
	* Weights for this dataset
	global pweight				"wgt"				// Useful to put this in a global, otherwise needs replacing throughout
// Variable list
	global disvar				"llsiB"
	global predictedvar			"${disvar}_predicted"
		global indicators			"i.PHQgroup i.pn1 i.pl2R i.pl4R i.pl5R i.pl6and7R"		// the ones used for predicted disability - have removed  i.bmigroup  (not available Italy, causes estimation probs)
	global IRTvar				"${disvar}_IRT"		// ${disvar}_IRT1 was a problem - see note in 4_dis_weights.do
		global IRTbinary			""																// the ones used for the IRT models
		global IRTordinal			"  PHQgroup   pn1   pl2R   pl4R   pl5R   pl6and7R   "			// the ones used for the IRT models
		global IRTnominal			""			// have removed bmigroup (not available Italy, causes estimation probs)
	global allIRTvar			"${disvar}_allIRT"
		global allIRTbinary		"${IRTbinary}  cd1aR cd1bR cd1cR cd1dR cd1eR cd1fR cd1gR cd1hR cd1iR cd1jR cd1kR cd1lR cd1mR cd1nR cd1oR cd1pR "
		global allIRTordinal	"${IRTordinal}" 
		global allIRTnominal	"${IRTnominal}" 
// Controls
	* For the predicted disability variable
	global disweight_controls	"sex ${empvar}"
	global disweight_factors 	"i.age i.countrynum"			// Factor variable controls, expanding through XI in the dis_weights .do file. i.hatlevel removed as doesn't look comparable
	* // For the actual bootstrap models
	global controls				"i.age "
		* Note changing age: I have now set AGE in the MARGINS commands to be the mean age in the sample, to try to ensure it produces similar prevalence to observed disability		// European standard population 2013, for MARGINS commands below - from Annex F of https://ec.europa.eu/eurostat/documents/3859598/5926869/KS-RA-13-028-EN.PDF.pdf/e713fa79-1add-44e8-b23d-5e8fa09b3f8f?t=1414782757000 (see also https://www.ons.gov.uk/aboutus/transparencyandgovernance/freedomofinformationfoi/onsuseoftherevisedeuropeanstandardpopulation20132013espformortalityfigures)
		* global eurpop = "9.age=0.28 10.age=0.26 11.age=0.24 12.age=0.22"		// // Note that the models differ to ELSA-SHARE-HRS because we don't have single year age
		* FOR EHIS YOUNGER AGES: global eurpop = "3.age=0.093 4.age=0.093 5.age=0.1008 6.age=0.1085 7.age=0.1085 8.age=0.1085 9.age=0.1085 10.age=0.1008 11.age=0.093 12.age=0.0854"
		* FOR EHIS YOUNGER AGES: If excluding 65-69 yos: global eurpop = "3.age=0.1017 4.age=0.1017 5.age=0.1102 6.age=0.1186 7.age=0.1186 8.age=0.1186 9.age=0.1186 10.age=0.1102 11.age=0.1018"
	global controls2			"i.sex"						//  Have taken out i.hatlevel, mirroring ELSA/SHARE/HRS decision
		// controlsmeans is set in the master file, based on the avgs within the working dataset - should be set to missing correctly where controls2 is empty
